This notebook was used simply to reformat the accident data for pycharts. It transforms the data so that it aggregates the total number of accidents per borough for each date.
In [1]:
import numpy as np
import pandas as pd
In [2]:
# Load accident data.
accident_data = pd.read_csv('../../NYPD_Motor_Vehicle_Collisions.csv')
In [12]:
# Convert date column to datetime format so that we can sort properly.
accident_data['PARSED_DATE'] = pd.to_datetime(accident_data['DATE'])
accident_data = accident_data.sort_values(by=['PARSED_DATE'])
accident_data['YEAR'] = accident_data['PARSED_DATE'].dt.year
accident_data['MONTH'] = accident_data['PARSED_DATE'].dt.month
In [53]:
# Aggregate by borough for each date and then unstack to reflatten it back out so each
# borough is a column.
timeseries_data = accident_data.groupby(['PARSED_DATE', 'BOROUGH']).size().unstack(level=-1)
In [54]:
timeseries_data
Out[54]:
In [55]:
# Save output to file.
timeseries_data.to_csv('../website/data/accident_timeseries_data.csv')
In [13]:
# Aggregate by borough for each year and then unstack to reflatten it back out so each
# borough is a column.
timeseries_yearly_data = accident_data.groupby(['YEAR', 'BOROUGH']).size().unstack(level=-1)
In [14]:
timeseries_yearly_data
Out[14]:
In [15]:
# Save output to file.
timeseries_yearly_data.to_csv('../website/data/accident_timeseries_yearly_data.csv')
In [16]:
# Aggregate by borough for each year/month and then unstack to reflatten it back out so each
# borough is a column.
timeseries_monthly_data = accident_data.groupby(['YEAR', 'MONTH', 'BOROUGH']).size().unstack(level=-1)
In [17]:
timeseries_monthly_data
Out[17]:
In [18]:
# Save output to file.
timeseries_monthly_data.to_csv('../website/data/accident_timeseries_monthly_data.csv')